import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
val ssc = new StreamingContext(sc,Seconds(10))
ssc.sparkContext.setLogLevel("ERROR")
val ds = ssc.socketTextStream("localhost",9999)
//统计网站单位时间的访问量去掉重复ip（访问量 个数）
ds.map(_.split(" ")(0)).map(x => (x,1)).reduceByKey(_+_).count().print()
ssc.start()
ssc.awaitTermination()

再打开一个终端：
nc -lk 9999
root@spark:~# nc -lk 9999
58.254.203.49 - - [01/Jul/2021:00:00:08 +0800] "GET /home.php?mod=spacecp&ac=pm&op=checknewpm&rand=1325606408 HTTP/1.1" 200 31 "http://www.itpub.net/forum-72-4.html?ts=30" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7"
58.254.203.49 - - [01/Jul/2021:00:00:08 +0800] "GET /home.php?mod=spacecp&ac=pm&op=checknewpm&rand=1325606408 HTTP/1.1" 200 31 "http://www.itpub.net/forum-72-4.html?ts=30" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7"
58.254.203.49 - - [01/Jul/2021:00:00:08 +0800] "GET /home.php?mod=spacecp&ac=pm&op=checknewpm&rand=1325606408 HTTP/1.1" 200 31 "http://www.itpub.net/forum-72-4.html?ts=30" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7"
58.254.203.49 - - [01/Jul/2021:00:00:08 +0800] "GET /home.php?mod=spacecp&ac=pm&op=checknewpm&rand=1325606408 HTTP/1.1" 200 31 "http://www.itpub.net/forum-72-4.html?ts=30" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7"